package util;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.FileWriter;

public class PdfUtil {

    private static String txtFilePath = "G:\\AITXT\\1-s2.0-%s-main.txt";

    /**
     * 读取pdf中文字信息(全部)
     */
    public static String readRdfFile(File inputFile){
        //创建文档对象
        PDDocument doc = null;
        String content = "";
        File txtFile = null;
        try {
            //加载一个pdf对象
            doc = PDDocument.load(inputFile);
            //获取一个PDFTextStripper文本剥离对象
            PDFTextStripper textStripper = new PDFTextStripper();
            content = textStripper.getText(doc);
            txtFile = new File(String.format(txtFilePath, inputFile.getName().split("\\.")[0]));
            FileWriter fileWriter = new FileWriter(txtFile);
            fileWriter.write(content);
            fileWriter.close();
            doc.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return content;
    }

    public static File getTxtFileFromPdf(File inputFile){
        //创建文档对象
        PDDocument doc = null;
        String content = "";
        File txtFile = null;
        try {
            //加载一个pdf对象
            doc = PDDocument.load(inputFile);
            //获取一个PDFTextStripper文本剥离对象
            PDFTextStripper textStripper = new PDFTextStripper();
            content = textStripper.getText(doc);
            txtFile = new File(String.format(txtFilePath, inputFile.getName().split("\\.")[0]));
            FileWriter fileWriter = new FileWriter(txtFile);
            fileWriter.write(content);
            fileWriter.close();
            doc.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return txtFile;
    }


    public static void main(String[] args) {
        readRdfFile(new File("G:/pdfpaper/ai/000437029400041X.pdf"));
    }

}
